***** Is Europe an optimal Political Area?                *****
***** Alberto Alesina, Guido Tabellini & Francesco Trebbi *****

***** Section IV.c

*Set up 
clear all
set mem 10g
set matsize 11000
set more off

* Set output directory
cap mkdir "section4"

use "data/EVS_GSS_final.dta", clear


* EU countries excluding Eastern Europe, no Germany split - We exclude US from this analysis as we want to compute distances only for EU countries
keep if country=="AT"| country=="BE"| country=="DE"| country=="DK"| country=="ES"| country=="FI"| country=="FR"| country=="GR"| country=="IE"| country=="IT"| country=="LU"| country=="NL"| country=="NO"| country=="PT"| country=="SE"| country=="GB-GBN" 

** Tab some covariates to use
qui tab marital_status, gen(X_marital_status)
qui tab education, gen(X_education)
qui tab employment, gen(X_employment)
qui tab income_recoded, gen(X_income_recoded)
qui tab size_of_town, gen(X_size_of_town)
qui tab education1, gen(X_education_v)
 
** Setting waves
gen wave=.
replace wave=1 if year==1981 | year==1982 | year==1983 | year==1984 | year==1986
replace wave=2 if year==1990 | year==1991 | year==1993
replace wave=3 if year==1998 | year==1999 | year==2000
replace wave=4 if year==2006 | year==2008 | year==2009 | year==2010 
drop if wave==.

* Keep only wave 4
keep if wave==4

* Shorten Variables Names 
rename Y_workingfemale wfemale
rename Y_careerfemale cfemale
rename Y_preschoolmother pmother

rename (Y_eu_fear_socsec Y_eu_fear_natid Y_eu_fear_tax Y_eu_fear_lowpow Y_eu_fear_unem) ///
       (eu_socsec eu_natid eu_tax eu_lowpow eu_unem)


* EU sentiment Variables
global eu_sent_2 eu_socsec eu_natid eu_tax eu_lowpow eu_unem
factor $eu_sent_2 , pcf factor(1) 
rotate, varimax
predict eu_sentiment_2
la var eu_sentiment_2 "European sentiment without country_pride, first pa from Y_eu_belonging Y_eu_fear_socsec Y_eu_fear_natid Y_eu_fear_tax Y_eu_fear_lowpow Y_eu_fear_unem"

* Cultural Variables
global ecult2 Y_obedience Y_trust Y_ideology Y_religious Y_divorce Y_euthan Y_suicide Y_altruism Y_hardwork Y_redistrib wfemale cfemale pmother Y_private Y_equalize Y_control Y_gay Y_god Y_abortion

* Covariates 
global ecov2 X_age X_sex X_marital_status* X_employment1 X_employment2 X_employment3 X_employment4 X_employment5 X_employment6 X_income_recoded1 X_income_recoded2 X_income_recoded3 X_size_of_town* X_education1 X_education2 X_education3 X_education4 X_education5 X_education6

* Set unit of analysis
drop country
gen country=area2

**** set the list of variable we are interested in
global cult $ecult2
global eu   eu_sentiment_2 
global cov  $ecov2
global labellist "Full"

***********************************************************************************
*
********************************************************************************
**We obtain N people at random from every country-wave 
version 13
global N = 500
set seed 364011739

foreach var of varlist $cult $cov area2code area3code {
	drop if missing(`var') // avoid missing values on the basic covariates, keep track of extended missing
}
foreach var of varlist $eu {
	keep if missing(`var')!=1 | country=="NO" // avoid missing values on the basic covariates, keep track of extended missing
}
foreach var of varlist $cult $cov $eu {
	*standardize all variables for equal weighting in distance measures below
	qui sum `var'
	replace `var' = (`var'-r(mean))/r(sd)
}

global i = 1
foreach var of varlist $cult{
reg `var' $cov
if $i == 1 {
}
else if $i != 1{
}
global i = $i + 1
predict res_cult_`var', resid
}

global i = 1
foreach var of varlist $eu{
reg `var' $cov
if $i == 1 {
}
else if $i != 1{
}
global i = $i + 1
predict res_eu_`var', resid
}

gen random = runiform() 
bysort country wave (random): keep if _n<=$N
saveold "data_to_use_EU_centroid.dta" , replace

preserve
keep $cult
tempname sigmasq
	qui describe
	scalar `sigmasq' = r(k) - 1
	tempname alfa
	scalar `alfa' = 1/(2*`sigmasq')
restore

* Generate Distance 
foreach var of varlist $cult res_cult_* {
egen mean_`var'=mean(`var')
gen diff_`var'=`var'-mean_`var'
gen sq_diff_`var'=diff_`var'^2
gen asqdiffcult_`var'=`alfa'*sq_diff_`var'
}

egen sum_sq_diff_cult = rowtotal(asqdiffcult_*)
gen gk_cult= 1 - exp(-sum_sq_diff_cult)

egen sum_sq_diff_cult_res=rowtotal(asqdiffcult_res_*)
gen gk_cult_res=1-exp(-sum_sq_diff_cult_res)

* Distance cov
preserve
keep $cov
tempname sigmasq
	qui describe
	scalar `sigmasq' = r(k) - 1
	tempname alfa
	scalar `alfa' = 1/(2*`sigmasq')
restore

foreach var of varlist $cov {
egen mean_`var'=mean(`var')
gen diff_`var'=`var'-mean_`var'
gen sq_diff_`var'=diff_`var'^2
gen asqdiffcov_`var'= `alfa'*sq_diff_`var'
}

egen sum_sq_diff_cov = rowtotal(asqdiffcov_*)
gen gk_cov= 1 - exp(-sum_sq_diff_cov)

saveold "data_to_use_EU_centroid", replace

** Prepare data for Maps
use "data/CCEU", clear
drop if _X<=-50 | _Y<=-10
saveold "CCEU_2", replace
use "data/regions", clear
gen NUTS_2_initial=substr(NUTS_id,1,2)
keep if NUTS_2_initial=="AT"| NUTS_2_initial=="BE"| NUTS_2_initial=="DE"| NUTS_2_initial=="DK"| NUTS_2_initial=="ES"| NUTS_2_initial=="FI"| NUTS_2_initial=="FR"| NUTS_2_initial=="EL"| NUTS_2_initial=="IE"| NUTS_2_initial=="IT"| NUTS_2_initial=="LU"| NUTS_2_initial=="NL"| NUTS_2_initial=="NO"| NUTS_2_initial=="PT"| NUTS_2_initial=="SE"| NUTS_2_initial=="UK" | NUTS_2_initial=="CH" 
saveold regions_2, replace

use "data_to_use_EU_centroid", clear
gen NUTS_id=""
replace NUTS_id=NUTS_2_code
sort NUTS_id
merge NUTS_id using regions_2
replace NUTS_id="NO" if country=="NO" | NUTS_2_initial=="NO"
replace NUTS_id="CH" if country=="CH" | NUTS_2_initial=="CH"
keep if STAT_LEVL_==2 | country=="NO" | country=="CH"
collapse $cult $eu gk* area2code, by(NUTS_id)
replace gk_cult=1000 if missing(gk_cult)
rename NUTS_id NUTS_ID

* Figure 12
export excel using "section4/figure_12.xls", replace first(variable)
***

** Correlation between cultural distance and eu sentiment
use "data_to_use_EU_centroid", clear

* Normalize eu_sentiment_2 variable
qui summ eu_sentiment_2
g eu_sent_norm = (eu_sentiment_2 - r(min)) / (r(max) - r(min))
la var eu_sent_norm "Variable rescaled from 0 to 1"

* Table 2
reg eu_sent_norm gk_cult, cluster(area3) 
outreg2 gk_cult using section4/table_2.xls, bd(4) replace

reg eu_sent_norm gk_cult $cov, cluster(area3) 
outreg2 gk_cult using section4/table_2.xls, bd(4) append

reg eu_sent_norm gk_cult $cov i.area2code, cluster(area3) 
outreg2 gk_cult using section4/table_2.xls, bd(4) append

reg eu_sent_norm gk_cult $cov i.area3code, cluster(area3) 
outreg2 gk_cult using section4/table_2.xls, bd(4) append
***

* Housekeeping
erase "data_to_use_EU_centroid.dta"
erase "CCEU_2.dta"
erase "regions_2.dta"
erase "section4/table_2.txt"
